## Statistical Summaries of Open Paths and Census data
## Andrew Reeves and Ryan T. Moore
## First: 26 March 2014
## Last: 19 January 2020

library(ggplot2)
library(Hmisc)
library(plyr)
library(car)
library(xtable)

## Load user-level data:
load("openPaths_userlevel_full.RData")  

## Name some areas
data$place <- rep("Other", nrow(data))
data$place[data$county.fips == "36061"] <- "New York, NY"
data$place[data$county.fips == "06075"] <- "San Francisco, CA"
data$place[data$county.fips == "53033"] <- "King County, WA (Seattle)"

## Plot dynamic pctblack, pcthisp, pctasian on 5 static geographies

plotDynOnStatics <- function(race.vars, static.geos){
  
  for(race.idx in race.vars){
    if(race.idx == "pctwhite"){race.pretty <- "White"}
    if(race.idx == "pctblack"){race.pretty <- "Black"}
    if(race.idx == "pcthisp"){race.pretty <- "Hispanic"}
    if(race.idx == "pctasian"){race.pretty <- "Asian"}
      
    for(static.idx in static.geos){
      if(static.idx == "blk"){static.pretty <- "Block"}
      if(static.idx == "blkg"){static.pretty <- "Block Group"}
      if(static.idx == "tract"){static.pretty <- "Tract"}
      if(static.idx == "cty"){static.pretty <- "County"}
      if(static.idx == "st"){static.pretty <- "State"}
      
      x.var <- paste(race.idx, ".", static.idx, "*100", sep = "")
      y.var <- paste(race.idx, ".dblk*100", sep = "")
           
      filen <- paste("raceSmooth", race.idx, static.idx, ".pdf", sep = "")
      pdf(filen)
      print(ggplot(data, aes_string(x = x.var, y = y.var)) + geom_point() + geom_smooth(method = "lm", col = "blue") + 
              xlim(0,100) + ylim(0, 100) + geom_abline() + geom_smooth(col = "red") +
              xlab(paste("% ", race.pretty, ", Static ", static.pretty, sep = "")) + 
              ylab(paste("% ", race.pretty, ", Dynamic Block", sep = "")) +
              theme(axis.title = force(element_text(size = rel(1.5)))))
      dev.off()
    }
  }
}

allraces <- c("pctwhite", "pctblack", "pcthisp", "pctasian")
allgeos <- c("blk", "blkg", "tract", "cty", "st")

## Running the function below creates the 20 constituent plots for 
##  Paper Figure 3 and related Appendix Figures 7, 8, and 9.
## It creates each plot in a .pdf file, stored in a 
##  working directory.

## WARNING: Running the function will overwrite the plots provided in that directory.
plotDynOnStatics(allraces, allgeos)


## "For example, the distribution of percent white in the census blocks our median respondent 
##   experiences has a standard deviation of about 21 percentage points, ..."

## Dist of SDs in dyn blocks:
#ggplot(data, aes(x = sdpctwhite.dblk)) + geom_density()
#summary(data$sdpctwhite.dblk)


## Appendix Table 2 Statistics

calcDynOnStatics <- function(race.vars, static.geos){
  
  stats <- data.frame(race = NA, geo = NA, stat = NA, value = NA)
  for(race.idx in race.vars){
    for(static.idx in static.geos){
#      x.var <- paste("I(", race.idx, ".", static.idx, "*100)", sep = "")
#      y.var <- paste("I(", race.idx, ".dblk*100)", sep = "")
      ff <- as.formula(paste("I(", race.idx, ".dblk*100) ~ I(", race.idx, ".", static.idx, "*100)", sep = ""))
      lm.out <- lm(ff, data = data, y = TRUE)
      rmse.tmp <- sqrt(mean((lm.out$fitted - lm.out$y)^2))
      this.model <- data.frame(race = race.idx, geo = static.idx, stat = c("Intercept", "Slope", "RMSE"), 
                               value = c(unname(coef(lm.out)), rmse.tmp))
      
    stats <- data.frame(rbind(stats, this.model))
    }
  }
  stats <- stats[!(apply((apply(stats, 1, is.na)), 2, mean) == 1), ]
  names(stats) <- c("Race", "Geography", "Statistic", "Value")
  stats$Race <- substring(stats$Race, first = 4)
  stats$Race <- capitalize(stats$Race)
  stats$Geography <- recode(stats$Geography, "'blk'='Block'; 'blkg'='Block Group'; 'tract'='Tract'; 'cty'='County'; 'st'='State'")
  stats$Race[stats$Race == "Hisp"] <- "Hispanic"
  
  return(stats)
}

stats <- calcDynOnStatics(allraces, allgeos)
summ.stats <- ddply(stats, .(Race, Statistic), summarise, mean = mean(Value), sd = sd(Value))

## Create Appendix Table 2:
print(xtable(stats, digits = 1), include.rownames = FALSE, file = "Table-2.tex")
